Radicalization Lab

Code sheet 1: Data preparation

Arda Ergin

Setup

devtools::load_all(here::here())
library(here)
library(haven)
library(readxl)
library(dplyr)

————-

Pilot

This is the data of 36 participants rating the vignettes on four dimensions.

Import

d_pilot_0 <- haven::read_sav(
  here::here("data-raw", "00_raw", "0_pilot.sav")
)

Exclusion

####### Informed Consent #######
# First excluding "Informed Consent is NA"
# Then based on "Informed Consent is 0"
d_pilot_1 <- d_pilot_0 %>% 
  dplyr::filter(!is.na(Consent)) %>% 
  dplyr::filter(Consent == 1)

## Number of participants excluded based on Informed Consent
nrow(d_pilot_0) - nrow(d_pilot_1)
## [1] 1
####### Progress #######
d_pilot_1_2 <- d_pilot_1 %>% 
  dplyr::filter(Progress > 95)

## Number of participants excluded based on Progress
nrow(d_pilot_1) - nrow(d_pilot_1_2)
## [1] 32
####### Duration #######
d_pilot_2 <- d_pilot_1_2 %>% RadLab::duration_exclusion("Duration__in_seconds_")
## Median duration: 845 seconds
## Upper bound: 1690 seconds
## Lower bound: 422.5 seconds
## Number of participants excluded: 8
## Warning: package 'ggplot2' was built under R version 4.3.2

Cleaning

####### Columns  #######
# Only Selecting the Relevant Columns
d_pilot_3 <- d_pilot_2[,19:102]


# Converting the Columns to Numeric 
d_pilot <- data.frame(
  lapply(d_pilot_3, as.numeric))

Means

In here, we have to calculate the mean for each vignette.

# Calculating means and storing them:
v.means_raw_pilot <- colMeans(d_pilot, na.rm = TRUE)

# Creating a data frame of means:
v.means_pilot <- as.data.frame(
  matrix(0, 
         ncol = 5, 
         nrow = 21)) 
colnames(v.means_pilot) <- c(
  'excluded', 
  'injustice', 
  'personal', 
  'violence', 
  'vignette_id')

# Giving the order
v.means_pilot['vignette_id'] <- 1:21

# Loop to assign means:
for (i in 1:4) {
  v.means_pilot[i] <- v.means_raw_pilot[seq(i,84,4)]
}

Matching with Overview Excel

This is the hand-filled excel file. It includes the order of the vignettes in each study. If a new study is done, it needs to be adjusted accordingly.

# Importing the Excel File
studies_all <- readxl::read_excel(
  here::here("data-raw", "experiment_metadata.xlsx")
)
studies_all <- studies_all[1:15,] # Only selecting the actually used vignettes

# Min-max normalisation for the control variables: 
v.means_pilot <- v.means_pilot %>%
  filter(vignette_id %in% studies_all$Pilot)

v.means_pilot_normalised <- v.means_pilot %>%
  mutate(across(c(excluded, injustice, personal, violence), 
                ~ (. - min(.)) / (max(.) - min(.))))

v.means_pilot <- v.means_pilot_normalised

########## Vignette Names and Their ID ##########
vignettes_full <- base::merge(
  v.means_pilot, 
  studies_all,
  by.x = "vignette_id",
  by.y = "Pilot")

########## Order: Studies ##########
o_study1 <- vignettes_full[!is.na(vignettes_full[7]), c(2:6, 7)]
o_study1 <- o_study1[order(o_study1[[6]]), ]
colnames(o_study1)[6] <- "order"
o_study2 <- vignettes_full[!is.na(vignettes_full[8]), c(2:6, 8)]
o_study2 <- o_study2[order(o_study2[[6]]), ]
colnames(o_study2)[6] <- "order"
o_study3 <- vignettes_full[!is.na(vignettes_full[9]), c(2:6, 9)]
o_study3 <- o_study3[order(o_study3[[6]]), ]
colnames(o_study3)[6] <- "order"
o_study4 <- vignettes_full[!is.na(vignettes_full[10]), c(2:6, 10)]
o_study4 <- o_study4[order(o_study4[[6]]), ]
colnames(o_study4)[6] <- "order"
o_study5 <- vignettes_full[!is.na(vignettes_full[11]), c(2:6, 11)]
o_study5 <- o_study5[order(o_study5[[6]]), ]
colnames(o_study5)[6] <- "order"
o_study6 <- vignettes_full[!is.na(vignettes_full[12]), c(2:6, 12)]
o_study6 <- o_study6[order(o_study6[[6]]), ]
colnames(o_study6)[6] <- "order"
o_study7 <- vignettes_full[!is.na(vignettes_full[13]), c(2:6, 13)]
o_study7 <- o_study7[order(o_study7[[6]]), ]
colnames(o_study7)[6] <- "order"
o_study8 <- vignettes_full[!is.na(vignettes_full[14]), c(2:6, 14)]
o_study8 <- o_study8[order(o_study8[[6]]), ]
colnames(o_study8)[6] <- "order"

————-

Study 1 (2019-2020)

Import

# Importing data:
d_study1_0 <- haven::read_spss(
  here::here("data-raw", "00_raw", "1_hartog.sav")
)

Exclusion

####### Informed Consent #######
# First excluding "Informed Consent is NA"
# Then based on "Informed Consent is 0"
d_study1_1 <- d_study1_0 %>% 
  dplyr::filter(!is.na(Consent)) %>% 
  dplyr::filter(Consent == 1)

## Number of participants excluded based on Informed Consent
nrow(d_study1_0) - nrow(d_study1_1)
## [1] 0
####### Progress #######
d_study1_1_2 <- d_study1_1 %>% 
  dplyr::filter(Progress > 95)

## Number of participants excluded based on Progress
nrow(d_study1_1) - nrow(d_study1_1_2)
## [1] 23
####### Duration #######
d_study1_2 <- d_study1_1_2 %>% RadLab::duration_exclusion("Duration__in_seconds_")
## Median duration: 1307.5 seconds
## Upper bound: 2615 seconds
## Lower bound: 653.75 seconds
## Number of participants excluded: 35

Cleaning

####### Columns  #######
# Only Selecting the Relevant Columns
d_study1_3 <- d_study1_2 %>% 
  subset(select = c(70:129, # Vignettes
                    139, # Condition
                    130, # Gender
                    131, # Age
                    34, # pol_liberal_conserv
                    35 # pol_left_right
                    ))

# Assigning ID to the participants:
d_study1_3$ID <- 
  # Number of Participants in Previous Studies:
  0 + ## --> i.e., this is the first study
  # Number of participants in this study:
  1:nrow(d_study1_3)

# Assigning experiment number to all participants in this dataset
d_study1_3$experiment <- 1

# Converting the Columns to Numeric 
d_study1 <- RadLab::enforce_variable_types(d_study1_3)

Means plot

d_study1[,1:60] %>% RadLab::plot_vignette_means(
  name_data = o_study1,
  n_vignettes = 15)
## [1] "all rows sum up to a 100, so everything is good!"

Data preparation

Structuring the data

d_study1_long_0 <- d_study1 %>% RadLab::wide_to_long(
  name_data = o_study1,
  n_vignettes = 15
)

Means Plot (2)

RadLab::plot_vignette_analysis(
  d_study1_long_0, 
  error_type = "sd"
)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## $ribbon

## 
## $faceted

## 
## $bars

Factorizing

##### Factorizing #####


# Condition
d_study1_long_0 <- d_study1_long_0 %>%
  dplyr::mutate(
    condition_f = factor(
      haven::labelled_spss(EmpathyCondition),
      levels = 1:2,
      labels = paste("Condition", 1:2)))


# Gender
d_study1_long_0 <- d_study1_long_0 %>%
  dplyr::mutate(
    gender_f = factor(
      haven::labelled_spss(gender),
      levels = 1:2,
      labels = c("Female", "Male")))

# Taking out the already converted variables
d_study1_long <- d_study1_long_0 %>% subset(select = -c(EmpathyCondition, gender))

Save

d_study1 <- d_study1_long %>% subset(select = -c(
  pol_left_right, 
  pol_liberal_conserv))

save(d_study1, file = here::here("data-raw", "01_cleaned", "d_study1.rda"))

————-

Study 2 (2020-2021)

Import

d_study2_0 <- haven::read_spss(
  here::here("data-raw", "00_raw", "2_meike.sav")
)

Exclusion

####### Informed Consent #######
# First excluding "Informed Consent is NA"
# Then based on "Informed Consent is 0"
d_study2_1 <- d_study2_0 %>% 
  dplyr::filter(!is.na(Consent)) %>% 
  dplyr::filter(Consent == 1)

## Number of participants excluded based on Informed Consent
nrow(d_study2_0) - nrow(d_study2_1)
## [1] 0
####### Progress #######
d_study2_1_2 <- d_study2_1 %>% 
  dplyr::filter(Progress > 95)

## Number of participants excluded based on Progress
nrow(d_study2_1) - nrow(d_study2_1_2)
## [1] 4
####### Duration #######
d_study2_2 <- d_study2_1_2 %>% RadLab::duration_exclusion("Duration_seconds")
## Median duration: 1626.5 seconds
## Upper bound: 3253 seconds
## Lower bound: 813.25 seconds
## Number of participants excluded: 20

Cleaning

Specifically for this data, in between measures for the vignettes, Meike has a “exclusion” and “fair” measure. They make it difficult to take the relevant columns. Hence, I am first going to delete those columns from the dataset.

####### Columns  #######
# Only Selecting the Relevant Columns
d_study2_3 <- d_study2_2 %>% 
  subset(select = -grep("exclusion_fair|eclusion_fair", names(.))) %>% 
  subset(select = -c(40:45)) %>% 
  subset(select = c(20:79,
                    3, # Condition
                    80, # Gender (gender)
                    81, # Age (age)
                    103, # liberal_conservative_rec
                    104 # left_right_rec
                    ))


# Assigning ID to the participants:
d_study2_3$ID <- 
  # Number of Participants in Previous Studies:
  length(unique(d_study1$ID)) + 
  # Number of participants in this study:
  1:nrow(d_study2_3)



# Assigning experiment number to all participants in this dataset
d_study2_3$experiment <- 2

# Converting the Columns to Numeric 
d_study2 <- RadLab::enforce_variable_types(d_study2_3)

Means plot

d_study2[, 1:60] %>% RadLab::plot_vignette_means(
  name_data = o_study2,
  n_vignettes = 15)
## [1] "all rows sum up to a 100, so everything is good!"

Data preparation

Structuring the data

d_study2_long_0 <- d_study2 %>% RadLab::wide_to_long(
  name_data = o_study2,
  n_vignettes = 15)

Means Plot (2)

RadLab::plot_vignette_analysis(
  d_study2_long_0, 
  error_type = "sd"
)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## $ribbon

## 
## $faceted

## 
## $bars

Factorizing

##### Factorizing #####


# Condition
d_study2_long_0 <- d_study2_long_0 %>%
  dplyr::mutate(
    condition_f = factor(
      haven::labelled_spss(Condition),
      levels = 1:3,
      labels = paste("Condition", 1:3)))


# Gender
d_study2_long_0 <- d_study2_long_0 %>%
  dplyr::mutate(
    gender_f = factor(
      haven::labelled_spss(gender),
      levels = 1:2,
      labels = c("Female", "Male")))


# Taking out the already converted variables
d_study2_long <- d_study2_long_0 %>% subset(select = -c(Condition, gender))

Save

d_study2 <- d_study2_long %>% subset(select = -c(
  liberal_conservative_rec, 
  left_right_rec))

save(d_study2, file = here::here("data-raw", "01_cleaned", "d_study2.rda"))

————-

Study 3 (2020-2021)

Import

# Importing data:
d_study3_0 <- haven::read_spss(
  here::here("data-raw", "00_raw", "3_julius.sav")
)

Exclusion

####### Informed Consent #######
# First excluding "Informed Consent is NA"
# Then based on "Informed Consent is 0"
d_study3_1 <- d_study3_0 %>% 
  dplyr::filter(!is.na(Consent)) %>% 
  dplyr::filter(Consent == 1)

## Number of participants excluded based on Informed Consent
nrow(d_study3_0) - nrow(d_study3_1)
## [1] 0
####### Progress #######
d_study3_1_2 <- d_study3_1 %>% 
  dplyr::filter(Progress > 95)

## Number of participants excluded based on Progress
nrow(d_study3_1) - nrow(d_study3_1_2)
## [1] 2
####### Duration #######
d_study3_2 <- d_study3_1_2 %>% RadLab::duration_exclusion("Duration__in_seconds_")
## Median duration: 1605 seconds
## Upper bound: 3210 seconds
## Lower bound: 802.5 seconds
## Number of participants excluded: 15

Cleaning

# Assigning ID to the participants:
d_study3_2$ID <- 
  # Number of Participants in Previous Studies:
  length(unique(d_study1$ID)) + 
  length(unique(d_study2$ID)) + 
  # Number of participants in this study:
  1:nrow(d_study3_2)


####### Cleaning & Columns  #######
# Only Selecting the Relevant Columns
d_study3_cleaning_1 <- d_study3_2 %>% 
  subset(select = grep("ID|P1_Q|P0_Q", names(.))) %>% 
  subset(select = -grep("e_", names(.)))


# Combining Two Conditions
for (i in 1:15){
  for (j in 1:4){
    d_study3_cleaning_1 <- d_study3_cleaning_1 %>% RadLab::add_row_sums(
    var_name = paste("Q", i, "_", j, 
                     sep = ""),
    # Items to sum up
    item_names = c(
      # Option 4
      grep(
        paste("Q",i,"_",j, 
              sep = ""), 
        names(d_study3_cleaning_1),
        value = TRUE)),
    ignore_na = T)
  }
}


# Getting Rid of these previous Columns
d_study3_cleaning_2 <- d_study3_cleaning_1 %>% subset(
  select = -grep("P1|P0", names(.)))


# Demographics
d_study3_demographics <- d_study3_2 %>% 
  subset(select = c(
    ID,
    Condition, # Condition
    Gen_2, # Gender
    age_2, # Age
    Pol1_2, # pol_liberal_conserv
    Pol2_2 # pol_left_right
    ))

# Combining
d_study3_3 <- dplyr::left_join(
  d_study3_cleaning_2, 
  d_study3_demographics,
  by = "ID")

# Relocating ID
d_study3_4 <- d_study3_3 %>% dplyr::relocate("ID", .after = "Q15_4")

# Assigning experiment number to all participants in this dataset
d_study3_4$experiment <- 3

# Converting the Columns to Numeric 
d_study3 <- RadLab::enforce_variable_types(d_study3_4)

Means plot

d_study3[, 1:60] %>% RadLab::plot_vignette_means(
  name_data = o_study3,
  n_vignettes = 15)
## [1] "all rows sum up to a 100, so everything is good!"

Data preparation

Structuring the data

d_study3_long_0 <- d_study3 %>% wide_to_long(
  name_data = o_study3,
  n_vignettes = 15)

Means Plot (2)

RadLab::plot_vignette_analysis(
  d_study3_long_0, 
  error_type = "sd"
)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## $ribbon

## 
## $faceted

## 
## $bars

Factorizing

##### Factorizing #####


# Condition
d_study3_long_0 <- d_study3_long_0 %>%
  dplyr::mutate(
    condition_f = factor(
      haven::labelled_spss(Condition),
      levels = 0:1,
      labels = paste("Condition", 1:2)))


# Gender
d_study3_long_0 <- d_study3_long_0 %>%
  dplyr::mutate(
    gender_f = factor(
      haven::labelled_spss(Gen_2),
      levels = 1:2,
      labels = c("Female", "Male"))) # Checked


# Age
d_study3_long_0$age <- d_study3_long_0$age_2


# Taking out the already converted variables
d_study3_long <- d_study3_long_0 %>% subset(select = -c(Condition, Gen_2, age_2))

Save

d_study3 <- d_study3_long %>% subset(select = -c(
  Pol1_2, 
  Pol2_2))

save(d_study3, file = here::here("data-raw", "01_cleaned", "d_study3.rda"))

————-

Study 4 (2020-2021)

Import

# Importing data:
d_study4_0 <- haven::read_spss(
  here::here("data-raw", "00_raw", "4_barbara.sav")
)

Exclusion

####### Informed Consent #######
# First excluding "Informed Consent is NA"
# Then based on "Informed Consent is 0"
d_study4_1 <- d_study4_0 %>% 
  dplyr::filter(!is.na(ConsentForm)) %>% 
  dplyr::filter(ConsentForm == "Yes, I agree to participate in this study.")

## Number of participants excluded based on Informed Consent
nrow(d_study4_0) - nrow(d_study4_1)
## [1] 0
####### Progress #######
## No Progress in the dataset


####### Duration #######
## No duration in the dataset

# Just for the sake of keeping the code same as the other studies:
d_study4_2 <- d_study4_1

Cleaning

Cleaning the columns in this study is a little bit tougher than the other studies. Two things:
- Combining the conditions in the 8th Vignette
- Summing Masculine & Feminine ENNA (i.e., action options 4 & 5), since we are only interested in ENNA in general and not the seperate versions.

########## Assigning ID ##########
# Assigning ID to the participants:
d_study4_2$ID <- 
  # Number of Participants in Previous Studies:
  length(unique(d_study1$ID)) + 
  length(unique(d_study2$ID)) + 
  length(unique(d_study3$ID)) + 
  # Number of participants in this study:
  1:nrow(d_study4_2)


########## Q8 ##########
# Separating Q8
d_study4_Q8 <- d_study4_2 %>% 
  subset(
    select = c(
      grep("flyer|Flyer", names(.)),
      ID))


# Q8 Combining the 6 Conditions into 5 new variables
for(i in 1:5){
  d_study4_Q8 <- d_study4_Q8 %>% RadLab::add_row_sums(
    var_name = paste("Q8", "flyer", i, sep = "_"), 
    item_names = colnames(d_study4_Q8)[
      grep(i, names(d_study4_Q8))],
    ignore_na = T)
}


########## Action Options 4 & 5 ##########
# Selecting Only the Relevant Columns:
d_study4_cleaning_0 <- d_study4_2 %>% 
  subset(
    select = c(
      grep(paste("Q", 1:16, sep = "", collapse = "|"),names(.)),
      ID)) %>%
  # Taking out these "_mean_" variables:
  subset(select = -grep("_mean_", names(.))) %>%
  # Not having the Q8:
  subset(select = -grep("flyer|Flyer", names(.)))


# Adding the 5 sum variables of Q8
d_study4_cleaning_1 <- dplyr::left_join(
  d_study4_cleaning_0,
  d_study4_Q8[, c("Q8_flyer_1",
                  "Q8_flyer_2",
                  "Q8_flyer_3",
                  "Q8_flyer_4",
                  "Q8_flyer_5",
                  "ID")],
  by = "ID")


# Combining Action Options 4 & 5 for all vignettes
for (i in 1:16){
  
  # Extracting the vignette name
  questions <- grep(
    paste("Q", i, "_", sep = ""),
    names(d_study4_cleaning_1),
    value = TRUE)
  vignette_name <- strsplit(questions[1],"_")[[1]][2]
  
  # Creating the Variable
  d_study4_cleaning_1 <- d_study4_cleaning_1 %>% RadLab::add_row_sums(
    var_name = paste("Q", i, "_", 
                     vignette_name, 
                     "_ENNA", sep = ""), 
    # Items to sum up
    item_names = c(
      # Option 4
      grep(
        paste("Q",i,"_",".*","_4", sep = ""), 
        names(d_study4_cleaning_1),
        value = TRUE),
      # Option 5
      grep(
        paste("Q",i,"_",".*","_5", sep = ""), 
        names(d_study4_cleaning_1),
        value = TRUE)),
    ignore_na = T)
}


# Taking out the action options 4 & 5
d_study4_cleaning_2 <- d_study4_cleaning_1 %>% 
  subset(select = -grep("_4|_5", names(.)))


# Changing the ENNA column names to "4"
colnames(d_study4_cleaning_2) <- gsub(
  "_ENNA", 
  "_4", 
  colnames(d_study4_cleaning_2))


########## Ordering ##########
# Saving only the column names to a new df
cn <- as.data.frame(colnames(d_study4_cleaning_2))
colnames(cn)[1] = "questions"

# creating order based on question number
cn <- cn %>% 
  dplyr::rowwise() %>% 
  dplyr::mutate(
    vignette_number = as.numeric(
      gsub(".*?([0-9]+).*", "\\1", questions)))
## Warning: There was 1 warning in `dplyr::mutate()`.
## ℹ In argument: `vignette_number = as.numeric(gsub(".*?([0-9]+).*", "\\1",
##   questions))`.
## ℹ In row 46.
## Caused by warning:
## ! NAs introduced by coercion
# Creating a nested order:
## 1) vignette number (1-16), 
## then the action option number (1-4)
cn_ordered <- cn[order(cn$vignette_number, cn$questions),]

# Creating a new ordered df
d_study4_cleaning_3 <- d_study4_cleaning_2[cn_ordered$questions]


########## Demographics ##########
d_study4_demographics <- d_study4_2 %>% 
  subset(select = c(
    ID,
    Cond, # Condition (6)
    GenderR, # Gender (Binary, already converted): 
    ## Male == 0, Female == 1
    Age, # Age
    # **Missing** # Political Liberal - Conservative 
    PolOri # Political Left (1) - Right (10)
    ))

# Combining Demographics with Vignettes
d_study4_3 <- dplyr::left_join(
  d_study4_cleaning_3, 
  d_study4_demographics,
  by = "ID")

# Assigning experiment number to all participants in this dataset
d_study4_3$experiment <- 4

# Converting the Columns to Numeric 
d_study4 <- RadLab::enforce_variable_types(d_study4_3)

Means Plot

d_study4[,1:64] %>% RadLab::plot_vignette_means(
  name_data = o_study4,
  n_vignettes = 16)
## [1] "all rows sum up to a 100, so everything is good!"

Data preparation

Structuring the data

d_study4_long_0 <- d_study4 %>% wide_to_long(
  name_data = o_study4,
  n_vignettes = 16)

Means Plot (2)

RadLab::plot_vignette_analysis(
  d_study4_long_0, 
  error_type = "sd"
)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## $ribbon

## 
## $faceted

## 
## $bars

Factorizing

##### Factorizing #####


# Condition
d_study4_long_0 <- d_study4_long_0 %>%
  dplyr::mutate(
    condition_f = factor(
      haven::labelled_spss(Cond),
      levels = 1:6,
      labels = paste("Condition", 1:6)))


# Gender
d_study4_long_0 <- d_study4_long_0 %>%
  dplyr::mutate(
    gender_f = factor(
      haven::labelled_spss(GenderR),
      levels = 0:1,
      labels = c("Male", "Female"))) # Checked


# Age
d_study4_long_0$age <- d_study4_long_0$Age


# Taking out the already converted variables
d_study4_long <- d_study4_long_0 %>% subset(select = -c(Cond, GenderR, Age))

Save

d_study4 <- d_study4_long %>% subset(select = -c(
  PolOri))

save(d_study4, file = here::here("data-raw", "01_cleaned", "d_study4.rda"))

————-

Study 5 (2021-2022)

Import

d_study5_0 <- haven::read_spss(
  here::here("data-raw", "00_raw", "5_brouwer_et_al.sav")
)

Exclusion

####### Informed Consent #######
# First excluding "Informed Consent is NA"
# Then based on "Informed Consent is 0"
d_study5_1 <- d_study5_0 %>% 
  dplyr::filter(!is.na(Q1)) %>% 
  dplyr::filter(Q1 == 1)

## Number of participants excluded based on Informed Consent
nrow(d_study5_0) - nrow(d_study5_1)
## [1] 31
####### Progress #######
d_study5_1_2 <- d_study5_1 %>% 
  dplyr::filter(Progress > 95)

## Number of participants excluded based on Progress
nrow(d_study5_1) - nrow(d_study5_1_2)
## [1] 86
####### Duration #######
d_study5_1_3 <- d_study5_1_2 %>% RadLab::duration_exclusion("Duration__in_seconds_")
## Median duration: 1504.5 seconds
## Upper bound: 3009 seconds
## Lower bound: 752.25 seconds
## Number of participants excluded: 71

####### Incorrect data #######
# This response is probably made during testing
# Because we can see (later on) the action options do not sum up to a 100 for this specific participant.
d_study5_2 <- d_study5_1_3 %>% filter(ResponseId != "R_pyfF9CBCg3Zc35v")

## Check that we excluded that participant:
nrow(d_study5_1_3) - nrow(d_study5_2)
## [1] 1

Cleaning

Note: The vignettes 4-10 are three different conditions mixed together.

########## Assigning ID ##########
# Assigning ID to the participants:
d_study5_2$ID <- 
  # Number of Participants in Previous Studies:
  length(unique(d_study1$ID)) + 
  length(unique(d_study2$ID)) + 
  length(unique(d_study3$ID)) + 
  length(unique(d_study4$ID)) + 
  # Number of participants in this study:
  1:nrow(d_study5_2)


####### Columns #######
d_study5_3 <- d_study5_2


d_study5_3 <- d_study5_3 %>%
  dplyr::mutate(
    # Vignette 4
    ina_4 = rowSums(
      dplyr::select(., Vig_4___Q_1, Vig_4___Q_1.0, Vig_4___Q_1.1), 
      na.rm = TRUE
    ),
    na_4 = rowSums(
      dplyr::select(., Vig_4___Q_2, Vig_4___Q_2.0, Vig_4___Q_2.1), 
      na.rm = TRUE
    ),
    nna_4 = rowSums(
      dplyr::select(., Vig_4___Q_3, Vig_4___Q_3.0, Vig_4___Q_3.1), 
      na.rm = TRUE
    ),
    enna_4 = rowSums(
      dplyr::select(., Vig_4___Q_4, Vig_4___Q_4.0, Vig_4___Q_4.1), 
      na.rm = TRUE
    ),
    
    # Vignette 5
    ina_5 = rowSums(
      dplyr::select(., Vig_5___Q_1, Vig_5___Q_1.0, Vig_5___Q_1.1), 
      na.rm = TRUE
    ),
    na_5 = rowSums(
      dplyr::select(., Vig_5___Q_2, Vig_5___Q_2.0, Vig_5___Q_2.1), 
      na.rm = TRUE
    ),
    nna_5 = rowSums(
      dplyr::select(., Vig_5___Q_3, Vig_5___Q_3.0, Vig_5___Q_3.1), 
      na.rm = TRUE
    ),
    enna_5 = rowSums(
      dplyr::select(., Vig_5___Q_4, Vig_5___Q_4.0, Vig_5___Q_4.1), 
      na.rm = TRUE
    ),

    # Vignette 6
    ina_6 = rowSums(
      dplyr::select(., Vig_6___Q_1, Vig_6___Q_1.0, Vig_6___Q_1.1), 
      na.rm = TRUE
    ),
    na_6 = rowSums(
      dplyr::select(., Vig_6___Q_2, Vig_6___Q_2.0, Vig_6___Q_2.1), 
      na.rm = TRUE
    ),
    nna_6 = rowSums(
      dplyr::select(., Vig_6___Q_3, Vig_6___Q_3.0, Vig_6___Q_3.1), 
      na.rm = TRUE
    ),
    enna_6 = rowSums(
      dplyr::select(., Vig_6___Q_4, Vig_6___Q_4.0, Vig_6___Q_4.1), 
      na.rm = TRUE
    ),

    # Vignette 7
    ina_7 = rowSums(
      dplyr::select(., Vig_7___Q_1, Vig_7___Q_1.0, Vig_7___Q_1.1), 
      na.rm = TRUE
    ),
    na_7 = rowSums(
      dplyr::select(., Vig_7___Q_2, Vig_7___Q_2.0, Vig_7___Q_2.1), 
      na.rm = TRUE
    ),
    nna_7 = rowSums(
      dplyr::select(., Vig_7___Q_3, Vig_7___Q_3.0, Vig_7___Q_3.1), 
      na.rm = TRUE
    ),
    enna_7 = rowSums(
      dplyr::select(., Vig_7___Q_4, Vig_7___Q_4.0, Vig_7___Q_4.1), 
      na.rm = TRUE
    ),

    # Vignette 8
    ina_8 = rowSums(
      dplyr::select(., Vig_8___Q_1, Vig_8___Q_1.0, Vig_8___Q_1.1), 
      na.rm = TRUE
    ),
    na_8 = rowSums(
      dplyr::select(., Vig_8___Q_2, Vig_8___Q_2.0, Vig_8___Q_2.1), 
      na.rm = TRUE
    ),
    nna_8 = rowSums(
      dplyr::select(., Vig_8___Q_3, Vig_8___Q_3.0, Vig_8___Q_3.1), 
      na.rm = TRUE
    ),
    enna_8 = rowSums(
      dplyr::select(., Vig_8___Q_4, Vig_8___Q_4.0, Vig_8___Q_4.1), 
      na.rm = TRUE
    ),

    # Vignette 9
    ina_9 = rowSums(
      dplyr::select(., Vig_9___Q_1, Vig_9___Q_1.0, Vig_9___Q_1.1), 
      na.rm = TRUE
    ),
    na_9 = rowSums(
      dplyr::select(., Vig_9___Q_2, Vig_9___Q_2.0, Vig_9___Q_2.1), 
      na.rm = TRUE
    ),
    nna_9 = rowSums(
      dplyr::select(., Vig_9___Q_3, Vig_9___Q_3.0, Vig_9___Q_3.1), 
      na.rm = TRUE
    ),
    enna_9 = rowSums(
      dplyr::select(., Vig_9___Q_4, Vig_9___Q_4.0, Vig_9___Q_4.1), 
      na.rm = TRUE
    ),

    # Vignette 10
    ina_10 = rowSums(
      dplyr::select(., Vig_10___Q_1, Vig_10___Q_1.0, Vig_10___Q_1.1), 
      na.rm = TRUE
    ),
    na_10 = rowSums(
      dplyr::select(., Vig_10___Q_2, Vig_10___Q_2.0, Vig_10___Q_2.1), 
      na.rm = TRUE
    ),
    nna_10 = rowSums(
      dplyr::select(., Vig_10___Q_3, Vig_10___Q_3.0, Vig_10___Q_3.1), 
      na.rm = TRUE
    ),
    enna_10 = rowSums(
      dplyr::select(., Vig_10___Q_4, Vig_10___Q_4.0, Vig_10___Q_4.1), 
      na.rm = TRUE
    )
  )



# Rename columns explicitly for Vignettes 1, 2, and 3
d_study5_3 <- d_study5_3 %>%
  rename(
    # Vignette 1
    ina_1 = Vig_1___Q_1,
    na_1 = Vig_1___Q_2,
    nna_1 = Vig_1___Q_3,
    enna_1 = Vig_1___Q_4,
    
    # Vignette 2
    ina_2 = Vig_2___Q_1,
    na_2 = Vig_2___Q_2,
    nna_2 = Vig_2___Q_3,
    enna_2 = Vig_2___Q_4,
    
    # Vignette 3
    ina_3 = Vig_3___Q_1,
    na_3 = Vig_3___Q_2,
    nna_3 = Vig_3___Q_3,
    enna_3 = Vig_3___Q_4
  )


d_study5_4 <- d_study5_3 %>%
  dplyr::select(
    ina_1, na_1, nna_1, enna_1,
    ina_2, na_2, nna_2, enna_2,
    ina_3, na_3, nna_3, enna_3,
    ina_4, na_4, nna_4, enna_4,
    ina_5, na_5, nna_5, enna_5,
    ina_6, na_6, nna_6, enna_6,
    ina_7, na_7, nna_7, enna_7,
    ina_8, na_8, nna_8, enna_8,
    ina_9, na_9, nna_9, enna_9,
    ina_10, na_10, nna_10, enna_10,
    ID, Gender, Age, Conditie
  ) 

# Check the final column names to confirm the order
colnames(d_study5_4)
##  [1] "ina_1"    "na_1"     "nna_1"    "enna_1"   "ina_2"    "na_2"    
##  [7] "nna_2"    "enna_2"   "ina_3"    "na_3"     "nna_3"    "enna_3"  
## [13] "ina_4"    "na_4"     "nna_4"    "enna_4"   "ina_5"    "na_5"    
## [19] "nna_5"    "enna_5"   "ina_6"    "na_6"     "nna_6"    "enna_6"  
## [25] "ina_7"    "na_7"     "nna_7"    "enna_7"   "ina_8"    "na_8"    
## [31] "nna_8"    "enna_8"   "ina_9"    "na_9"     "nna_9"    "enna_9"  
## [37] "ina_10"   "na_10"    "nna_10"   "enna_10"  "ID"       "Gender"  
## [43] "Age"      "Conditie"
# Assigning experiment number to all participants in this dataset
d_study5_4$experiment <- 5

# Converting the Columns to Numeric 
d_study5 <- RadLab::enforce_variable_types(d_study5_4)

Means Plot

d_study5[,1:40] %>% RadLab::plot_vignette_means(
  name_data = o_study5,
  n_vignettes = 10)
## [1] "all rows sum up to a 100, so everything is good!"

Data preparation

Structuring the data

d_study5_long_0 <- d_study5 %>% wide_to_long(
  name_data = o_study5,
  n_vignettes = 10)

Means Plot (2)

RadLab::plot_vignette_analysis(
  d_study5_long_0, 
  error_type = "sd"
)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## $ribbon

## 
## $faceted

## 
## $bars

Factorizing

##### Factorizing #####


# Condition
d_study5_long_0 <- d_study5_long_0 %>%
  dplyr::mutate(
    condition_f = factor(
      haven::labelled_spss(Conditie),
      levels = 0:2,
      labels = paste("Condition", 1:3)))


# Gender
d_study5_long_0 <- d_study5_long_0 %>%
  dplyr::mutate(
    gender_f = factor(
      haven::labelled_spss(Gender),
      levels = 1:2,
      labels = c("Male", "Female"))) # Checked


# Age
d_study5_long_0$age <- d_study5_long_0$Age


# Taking out the already converted variables
d_study5_long <- d_study5_long_0 %>% subset(select = -c(Conditie, Gender, Age))

Save

d_study5 <- d_study5_long
save(d_study5, file = here::here("data-raw", "01_cleaned", "d_study5.rda"))

————-

Study 6 (2022-2023)

Import

# Importing data:
d_study6_0 <- haven::read_spss(
  here::here("data-raw", "00_raw", "6_cham_et_al.sav")
)

Exclusion

####### Informed Consent #######
# First excluding "Informed Consent is NA"
# Then based on "Informed Consent is 0"
d_study6_1 <- d_study6_0 %>% 
  dplyr::filter(!is.na(Consentyesno)) %>% 
  dplyr::filter(Consentyesno == 1)

## Number of participants excluded based on Informed Consent
nrow(d_study6_0) - nrow(d_study6_1)
## [1] 0
####### Progress #######
d_study6_1_2 <- d_study6_1 %>% 
  dplyr::filter(Progress > 95)

## Number of participants excluded based on Progress
nrow(d_study6_1) - nrow(d_study6_1_2)
## [1] 0
####### Duration #######
d_study6_2 <- d_study6_1_2 %>% RadLab::duration_exclusion("Duration__in_seconds_")
## Median duration: 1062.5 seconds
## Upper bound: 2125 seconds
## Lower bound: 531.25 seconds
## Number of participants excluded: 39

Cleaning

########## Assigning ID ##########
# Assigning ID to the participants:
d_study6_2$ID <- 
  # Number of Participants in Previous Studies:
  length(unique(d_study1$ID)) + 
  length(unique(d_study2$ID)) + 
  length(unique(d_study3$ID)) + 
  length(unique(d_study4$ID)) + 
  length(unique(d_study5$ID)) + 
  # Number of participants in this study:
  1:nrow(d_study6_2)


####### Columns  #######
# Only Selecting the Relevant Columns
d_study6_3 <- d_study6_2 %>% 
  subset(
    select = c(grep("Vig1|Vig2|Vig3|Vig4|Vig5|Vig6|Vig7|Vig8", 
                  names(.)),
               ID))

########## Demographics ##########
d_study6_demographics <- d_study6_2 %>% 
  subset(select = c(
    ID,
    # Condition missing?, # Condition
    Gender, # Gender (Not Binary): 
    Age # Age - 1: Male, 2: Female
    # **Missing** # Political Liberal - Conservative 
    # **Missing** # Political Left - Right
    ))

# Combining Demographics with Vignettes
d_study6_4 <- dplyr::left_join(
  d_study6_3, 
  d_study6_demographics,
  by = "ID")

# Assigning experiment number to all participants in this dataset
d_study6_4$experiment <- 6

# Converting the Columns to Numeric 
d_study6 <- RadLab::enforce_variable_types(d_study6_4)

Means Plot

d_study6[,1:32] %>% RadLab::plot_vignette_means(
  name_data = o_study6,
  n_vignettes = 8)
## [1] "all rows sum up to a 100, so everything is good!"

Data preparation

Structuring the data

d_study6_long_0 <- d_study6 %>% wide_to_long(
  name_data = o_study6,
  n_vignettes = 8)

Means Plot (2)

RadLab::plot_vignette_analysis(
  d_study6_long_0, 
  error_type = "sd"
)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## $ribbon

## 
## $faceted

## 
## $bars

Factorizing

##### Factorizing #####


# Condition
#* No condition in this experiment.
d_study6_long_0$condition_f <- as.factor(0)



# Gender
d_study6_long_0 <- d_study6_long_0 %>%
  dplyr::mutate(
    gender_f = factor(
      haven::labelled_spss(Gender),
      levels = 1:2,
      labels = c("Male", "Female"))) # Checked


# Age
d_study6_long_0$age <- d_study6_long_0$Age


# Taking out the already converted variables
d_study6_long <- d_study6_long_0 %>% subset(select = -c(Gender, Age))

Save

d_study6 <- d_study6_long
save(d_study6, file = here::here("data-raw", "01_cleaned", "d_study6.rda"))

————-

Study 7 (2022-2023)

Import

# Importing data:
d_study7_0 <- haven::read_spss(
  here::here("data-raw", "00_raw", "7_jordan.sav")
)

Exclusion

I think this is already done; it is already the cleaned data I think.

d_study7_2 <- d_study7_0

Cleaning

########## Assigning ID ##########
# Assigning ID to the participants:
d_study7_2$ID <- 
  # Number of Participants in Previous Studies:
  length(unique(d_study1$ID)) + 
  length(unique(d_study2$ID)) + 
  length(unique(d_study3$ID)) + 
  length(unique(d_study4$ID)) + 
  length(unique(d_study5$ID)) + 
  length(unique(d_study6$ID)) + 
  # Number of participants in this study:
  1:nrow(d_study7_2)


####### Columns #######
# Load necessary libraries
library(dplyr)
library(stringr)

# Rename function for Study 7 with sequential suffixes for each question type
rename_study7_columns <- function(data) {
  # Identify the columns that start with "Q" and end in "_1", "_2", "_3", "_4"
  question_cols <- grep("^Q[0-9]+_", names(data), value = TRUE)
  
  # Extract the vignette number for renaming
  vignette_numbers <- sapply(str_extract_all(question_cols, "[0-9]+"), function(x) x[1])
  
  # Rename columns by matching the suffix and adding appropriate prefix (ina, na, nna, enna)
  new_names <- mapply(function(col, vignette) {
    if (grepl("_1$", col)) {
      paste("ina", vignette, sep = "_")
    } else if (grepl("_2$", col)) {
      paste("na", vignette, sep = "_")
    } else if (grepl("_3$", col)) {
      paste("nna", vignette, sep = "_")
    } else if (grepl("_4$", col)) {
      paste("enna", vignette, sep = "_")
    } else {
      col  # Leave other columns unchanged
    }
  }, question_cols, vignette_numbers)
  
  # Rename columns in data
  names(data)[names(data) %in% question_cols] <- new_names
  
  return(data)
}

# Apply renaming function on Study 7 data
d_study7_3 <- rename_study7_columns(d_study7_2)



# Reorder columns manually to preserve the order of "ina", "na", "nna", "enna"
d_study7_4 <- d_study7_3 %>%
  dplyr::select(
    ina_1, na_1, nna_1, enna_1,
    ina_2, na_2, nna_2, enna_2,
    ina_3, na_3, nna_3, enna_3,
    ina_4, na_4, nna_4, enna_4,
    ina_5, na_5, nna_5, enna_5,
    ina_6, na_6, nna_6, enna_6,
    ina_7, na_7, nna_7, enna_7,
    ID, Gender, Age
  )


# Check the resulting dataset to ensure only relevant columns are selected
colnames(d_study7_4)
##  [1] "ina_1"  "na_1"   "nna_1"  "enna_1" "ina_2"  "na_2"   "nna_2"  "enna_2"
##  [9] "ina_3"  "na_3"   "nna_3"  "enna_3" "ina_4"  "na_4"   "nna_4"  "enna_4"
## [17] "ina_5"  "na_5"   "nna_5"  "enna_5" "ina_6"  "na_6"   "nna_6"  "enna_6"
## [25] "ina_7"  "na_7"   "nna_7"  "enna_7" "ID"     "Gender" "Age"
# Assigning experiment number to all participants in this dataset
d_study7_4$experiment <- 7

# Converting the Columns to Numeric 
d_study7 <- RadLab::enforce_variable_types(d_study7_4)

Means Plot

d_study7[,1:28] %>% RadLab::plot_vignette_means(
  name_data = o_study7,
  n_vignettes = 7)
## [1] "all rows sum up to a 100, so everything is good!"

Data preparation

Structuring the data

d_study7_long_0 <- d_study7 %>% wide_to_long(
  name_data = o_study7,
  n_vignettes = 7)

Means Plot (2)

RadLab::plot_vignette_analysis(
  d_study7_long_0, 
  error_type = "sd"
)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## $ribbon

## 
## $faceted

## 
## $bars

Factorizing

##### Factorizing #####


# Condition
d_study7_long_0$condition_f <- as.factor(0)


# Gender
d_study7_long_0 <- d_study7_long_0 %>%
  dplyr::mutate(
    gender_f = factor(
      haven::labelled_spss(Gender),
      levels = 1:2,
      labels = c("Male", "Female"))) # Checked


# Age
d_study7_long_0$age <- d_study7_long_0$Age


# Taking out the already converted variables
d_study7_long <- d_study7_long_0 %>% subset(select = -c(Gender, Age))

Save

d_study7 <- d_study7_long
save(d_study7, file = here::here("data-raw", "01_cleaned", "d_study7.rda"))

————-

Study 8 (2023-2024)

Import

# Importing data:
d_study8_0 <- haven::read_spss(
  here::here("data-raw", "00_raw", "8_deree.sav")
)

Exclusion

I think this is already done; it is already the cleaned data I think.

d_study8_2 <- d_study8_0

Cleaning

########## Assigning ID ##########
# Assigning ID to the participants:
d_study8_2$ID <- 
  # Number of Participants in Previous Studies:
  length(unique(d_study1$ID)) + 
  length(unique(d_study2$ID)) + 
  length(unique(d_study3$ID)) + 
  length(unique(d_study4$ID)) + 
  length(unique(d_study5$ID)) + 
  length(unique(d_study6$ID)) + 
  length(unique(d_study7$ID)) + 
  # Number of participants in this study:
  1:nrow(d_study8_2)


####### Columns #######
# Load necessary libraries
library(dplyr)
library(stringr)

# Function to rename columns based on the pattern
rename_study8_columns <- function(data) {
  # Identify columns that start with "V" and end in "_1", "_2", "_3", "_4"
  vignette_cols <- grep("^V[0-9]+_", names(data), value = TRUE)
  
  # Extract the vignette number for renaming
  vignette_numbers <- sapply(str_extract_all(vignette_cols, "[0-9]+"), function(x) x[1])
  
  # Rename columns by matching the suffix and adding appropriate prefix (ina, na, nna, enna)
  new_names <- mapply(function(col, vignette) {
    if (grepl("_1$", col)) {
      paste("ina", vignette, sep = "_")
    } else if (grepl("_2$", col)) {
      paste("na", vignette, sep = "_")
    } else if (grepl("_3$", col)) {
      paste("nna", vignette, sep = "_")
    } else if (grepl("_4$", col)) {
      paste("enna", vignette, sep = "_")
    } else {
      col  # Leave other columns unchanged
    }
  }, vignette_cols, vignette_numbers)
  
  # Rename columns in data
  names(data)[names(data) %in% vignette_cols] <- new_names
  
  return(data)
}

# Apply renaming function on Study 8 data
d_study8_3 <- rename_study8_columns(d_study8_2)

# Select relevant columns in the specified order
d_study8_4 <- d_study8_3 %>%
  dplyr::select(
    ina_1, na_1, nna_1, enna_1,
    ina_2, na_2, nna_2, enna_2,
    ina_3, na_3, nna_3, enna_3,
    ina_4, na_4, nna_4, enna_4,
    ina_5, na_5, nna_5, enna_5,
    ina_6, na_6, nna_6, enna_6,
    ID, gen, age
  )

# Assign IDs, Experiment Number, and select columns
d_study8_4$experiment <- 8

# Check the column names to confirm correct renaming
colnames(d_study8_4)
##  [1] "ina_1"      "na_1"       "nna_1"      "enna_1"     "ina_2"     
##  [6] "na_2"       "nna_2"      "enna_2"     "ina_3"      "na_3"      
## [11] "nna_3"      "enna_3"     "ina_4"      "na_4"       "nna_4"     
## [16] "enna_4"     "ina_5"      "na_5"       "nna_5"      "enna_5"    
## [21] "ina_6"      "na_6"       "nna_6"      "enna_6"     "ID"        
## [26] "gen"        "age"        "experiment"
# Convert columns to numeric if necessary
d_study8 <- RadLab::enforce_variable_types(d_study8_4)

Means Plot

d_study8[,1:24] %>% RadLab::plot_vignette_means(
  name_data = o_study8,
  n_vignettes = 6)
## [1] "all rows sum up to a 100, so everything is good!"

Data preparation

Structuring the data

d_study8_long_0 <- d_study8 %>% wide_to_long(
  name_data = o_study8,
  n_vignettes = 6)

Means Plot (2)

RadLab::plot_vignette_analysis(
  d_study8_long_0, 
  error_type = "sd"
)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## $ribbon

## 
## $faceted

## 
## $bars

Factorizing

##### Factorizing #####


# Condition
d_study8_long_0$condition_f <- as.factor(0)


# Gender
d_study8_long_0 <- d_study8_long_0 %>%
  dplyr::mutate(
    gender_f = factor(
      haven::labelled_spss(gen),
      levels = 1:2,
      labels = c("Male", "Female"))) # Checked


# Age
## already fine

# Taking out the already converted variables
d_study8_long <- d_study8_long_0 %>% subset(select = -c(gen))

Save

d_study8 <- d_study8_long
save(d_study8, file = here::here("data-raw", "01_cleaned", "d_study8.rda"))